# -*- coding: utf-8 -*-
"""
Created on Sat Mar  9 16:37:07 2024

@author: yys
"""



## 先奖原始文件放入一个文件夹 后批量转成ASCII 再运行该程序
import re
import os
import pandas as pd
import numpy as np





# 定义提取数字的函数
def extract_numbers(cell):
    match = re.search(r'\d+', str(cell))
    if match:
        return int(match.group())
    else:
        return None
year=str(2019)

folder_path = "J:\\BJ TK\\5m\\"+year
folder_pathls=folder_path[0:8]
# 获取文件夹内所有CSV文件的文件名
csv_files = [file for file in os.listdir(folder_path) if file.endswith('.csv')]

# 循环读取每个CSV文件并将其合并到merged_data中
for csv_file in csv_files:
    file_path = os.path.join(folder_path, csv_file)
    print('提取'+csv_file)
    name=csv_file[0:10]
        # 读取当前CSV文件的数据
    current_data = pd.read_csv(file_path)
    lscolumns = current_data.columns
    current_data['geopotential_height'] = current_data['geopotential_height'] - current_data.iloc[0,0]     
    aa=current_data.to_numpy()
    a=aa.shape;    
    rows=a[0];
    cols=a[1];
    new=np.zeros([rows,cols])
    i=0
    for r in range(0,rows):
        if  aa[r,0]%30==0:#直接取30m间隔
                new[i,0]=aa[r,0];
                new[i,1:]=np.round(aa[r,1:], decimals=4)#四舍五入保留4位小数;
                i=i+1;
    
    # 找到尾部全为0的行
    non_zero_rows = np.any(new != 0, axis=1)
    # 从数组中提取非零行
    result_array = new[non_zero_rows]                
                
    df = pd.DataFrame(result_array, columns= lscolumns)
    df = df.rename(columns={'geopotential_height': 'geopotential_height_julidibiao'})
    df.to_csv(folder_pathls+'\\30m\\'+year+'\\'+name+'_30m.csv', index=False,header=True,encoding='utf-8')                

